﻿using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.IO;
using PorterStemmerAlgorithm;
using Amazon;
using HLDA;

namespace RankReviews
{
    class RankReviews
    {
        static void Main(string[] args)
        {
            string bookFile = @"..\..\..\..\Books.txt";
            string reviewFile = @"..\..\..\..\Books Reviews.txt";
            Amazon.Amazon amazon = new Amazon.Amazon();
            StreamReader sr = new StreamReader(bookFile);
            
            string line;
            while (!sr.EndOfStream)
            {
                line = sr.ReadLine();
                string[] tokens = line.Split('\t');
                amazon.AddBook(new Book(tokens[0].Trim(), "", tokens[1].Trim()));
            }
            sr.Close();
            sr = new StreamReader(reviewFile);
            line = sr.ReadLine(); //read headers
            
            while (!sr.EndOfStream)
            {
                line = sr.ReadLine();
                string[] tokens = line.Split('\t');
                //text is in last two
                int summaryIndex = tokens.Length - 2;
                int textIndex = tokens.Length - 1;
                Review rev = new Review();
                rev.authorId = tokens[0];
                rev.book = amazon.books[tokens[1]];
                rev.helpfulVotes = int.Parse(tokens[2]);
                rev.totalVotes = int.Parse(tokens[3]);
                rev.rating = double.Parse(tokens[4]);
                rev.summary = tokens[5];
                rev.reviewText = tokens[6];
                amazon.AddReview(rev);
            }
            sr.Close();

            Regex regex = new Regex("[^a-zA-Z]");
            PorterStemmer porterStemmer = new PorterStemmer();

            HLDA.HLDA hlda = new HLDA.HLDA();
            foreach (Review rev in amazon.reviews)
            {
                HldaDoc doc = new HldaDoc("", Global.maxLevel);
                string longString = rev.summary + " " + rev.reviewText;
                string[] words = regex.Split(longString);
                foreach (string word in words)
                {
                    if (word.Equals("")) continue;
                    doc.AddWord(word);
                }
                hlda.AddDocument(doc);
                rev.doc = doc;
            }
            hlda.StartProcessing(1000);
            hlda.WriteTopicToFile(@"..\..\..\..\3level_Topics.txt");
            //hlda.root
            //hlda.root.PrintTopWords("", hlda.vocabularyIndex);
            amazon.WriteRankedReviews(@"..\..\..\..\3level_RankedReviews.txt");
        }
    }
}
